In [1]:
    
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
    
%matplotlib inline is necessary for Seaborn to display the plot without additional calls
In [2]:
    
train = pd.read_csv("train.csv")
    
In [3]:
    
test = pd.read_csv("test.csv")
    
We use Pandas to load the csv files.
In [4]:
    
train.head()
    
    Out[4]:
In [6]:
    
sns.barplot('MSSubClass', 'SalePrice', data=train)
    
    Out[6]:
    
MSSubClass: Identifies the type of dwelling involved in the sale
In [7]:
    
sns.barplot('MSZoning', 'SalePrice', data=train)
    
    Out[7]:
    
In [8]:
    
sns.lmplot('LotFrontage', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers
    
    Out[8]:
    
In [9]:
    
sns.lmplot('LotArea', 'SalePrice', data=train, fit_reg=True)
# Note that the regression line is highly impacted by outliers
    
    Out[9]:
    
In [10]:
    
sns.barplot('Street', 'SalePrice', data=train)
    
    Out[10]:
    
In [11]:
    
sns.barplot('Alley', 'SalePrice', data=train)
    
    Out[11]:
    
In [12]:
    
sns.barplot('LotShape', 'SalePrice', data=train)
# Reg  Regular
# IR1  Slightly irregular
# IR2  Moderately Irregular
# IR3  Irregular
    
    Out[12]:
    
In [13]:
    
sns.barplot('LandContour', 'SalePrice', data=train)
    
    Out[13]:
    
In [14]:
    
sns.barplot('Utilities', 'SalePrice', data=train)
    
    Out[14]:
    
In [15]:
    
sns.barplot('LotConfig', 'SalePrice', data=train)
    
    Out[15]:
    
In [16]:
    
sns.barplot('LandSlope', 'SalePrice', data=train)
    
    Out[16]:
    
In [17]:
    
sns.barplot('Neighborhood', 'SalePrice', data=train)
    
    Out[17]:
    
In [18]:
    
sns.barplot('Condition1', 'SalePrice', data=train)
    
    Out[18]:
    
In [19]:
    
sns.barplot('Condition2', 'SalePrice', data=train)
    
    Out[19]:
    
In [20]:
    
sns.barplot('BldgType', 'SalePrice', data=train)
    
    Out[20]:
    
In [21]:
    
sns.barplot('HouseStyle', 'SalePrice', data=train)
    
    Out[21]:
    
In [105]:
    
sns.barplot('OverallQual', 'SalePrice', data=train)
    
    Out[105]:
    
In [104]:
    
sns.barplot('OverallCond', 'SalePrice', data=train)
# Notice how the wide range of values for 5 distorts the regression line
    
    Out[104]:
    
In [27]:
    
sns.lmplot('YearBuilt', 'SalePrice', data=train, fit_reg=True)
    
    Out[27]:
    
In [29]:
    
sns.lmplot('YearRemodAdd', 'SalePrice', data=train, fit_reg=True)
    
    Out[29]:
    
In [31]:
    
sns.barplot('RoofStyle', 'SalePrice', data=train)
    
    Out[31]:
    
In [33]:
    
sns.barplot('RoofMatl', 'SalePrice', data=train)
    
    Out[33]:
    
In [34]:
    
sns.barplot('Exterior1st', 'SalePrice', data=train)
    
    Out[34]:
    
In [35]:
    
sns.barplot('Exterior2nd', 'SalePrice', data=train)
    
    Out[35]:
    
In [36]:
    
sns.barplot('MasVnrType', 'SalePrice', data=train)
    
    Out[36]:
    
In [40]:
    
sns.lmplot('MasVnrArea', 'SalePrice', data=train, fit_reg=True)
# The high number of cases with an area = 0 is causing bias in the regression line
    
    Out[40]:
    
In [41]:
    
sns.barplot('ExterQual', 'SalePrice', data=train)
# Seems that exterior quality is associated with higher sale price compared to...
    
    Out[41]:
    
In [45]:
    
sns.barplot('ExterCond', 'SalePrice', data=train)
# Exterior condition
    
    Out[45]:
    
In [42]:
    
sns.barplot('Foundation', 'SalePrice', data=train)
# Poured concrete is more likely to be associated with higher sales prices
    
    Out[42]:
    
In [43]:
    
sns.barplot('BsmtQual', 'SalePrice', data=train)
# Evaluates the height of the basement
# There is no "Po" (Poor, <70 inches)
# Seems that a high basement ceiling is associated with much higher Sales Price
    
    Out[43]:
    
In [44]:
    
sns.barplot('BsmtCond', 'SalePrice', data=train)
# Strange that the good quality basements would be associated with higher Sales Price
# This may suggest that Basement Condition is not as import
    
    Out[44]:
    
In [46]:
    
sns.barplot('BsmtExposure', 'SalePrice', data=train)
    
    Out[46]:
    
In [47]:
    
sns.barplot('BsmtFinType1', 'SalePrice', data=train)
    
    Out[47]:
    
In [49]:
    
sns.lmplot('BsmtFinSF1', 'SalePrice', data=train, fit_reg=True)
# Regression line is again impacted by the high number of houses with area of 0
    
    Out[49]:
    
In [50]:
    
sns.barplot('BsmtFinType2', 'SalePrice', data=train)
    
    Out[50]:
    
In [52]:
    
sns.lmplot('BsmtFinSF2', 'SalePrice', data=train, fit_reg=True)
    
    Out[52]:
    
In [54]:
    
sns.lmplot('BsmtUnfSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[54]:
    
In [55]:
    
sns.lmplot('TotalBsmtSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[55]:
    
In [56]:
    
sns.barplot('Heating', 'SalePrice', data=train)
    
    Out[56]:
    
In [57]:
    
sns.barplot('HeatingQC', 'SalePrice', data=train)
    
    Out[57]:
    
In [58]:
    
sns.barplot('CentralAir', 'SalePrice', data=train)
    
    Out[58]:
    
In [59]:
    
sns.barplot('Electrical', 'SalePrice', data=train)
    
    Out[59]:
    
In [60]:
    
sns.lmplot('1stFlrSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[60]:
    
In [61]:
    
sns.lmplot('2ndFlrSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[61]:
    
In [62]:
    
sns.lmplot('LowQualFinSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[62]:
    
In [63]:
    
sns.lmplot('GrLivArea', 'SalePrice', data=train, fit_reg=True)
    
    Out[63]:
    
In [66]:
    
sns.barplot('BsmtFullBath', 'SalePrice', data=train)
    
    Out[66]:
    
In [67]:
    
sns.barplot('BsmtHalfBath', 'SalePrice', data=train)
    
    Out[67]:
    
In [71]:
    
sns.barplot('BedroomAbvGr', 'SalePrice', data=train)
    
    Out[71]:
    
In [72]:
    
sns.barplot('KitchenAbvGr', 'SalePrice', data=train)
    
    Out[72]:
    
In [73]:
    
sns.barplot('KitchenQual', 'SalePrice', data=train)
    
    Out[73]:
    
In [75]:
    
sns.barplot('TotRmsAbvGrd', 'SalePrice', data=train)
    
    Out[75]:
    
In [76]:
    
sns.barplot('Functional', 'SalePrice', data=train)
    
    Out[76]:
    
In [77]:
    
sns.barplot('Fireplaces', 'SalePrice', data=train)
    
    Out[77]:
    
In [78]:
    
sns.barplot('FireplaceQu', 'SalePrice', data=train)
    
    Out[78]:
    
In [79]:
    
sns.barplot('GarageType', 'SalePrice', data=train)
    
    Out[79]:
    
In [81]:
    
sns.lmplot('GarageYrBlt', 'SalePrice', data=train, fit_reg=True)
    
    Out[81]:
    
In [82]:
    
sns.barplot('GarageFinish', 'SalePrice', data=train)
    
    Out[82]:
    
In [83]:
    
sns.barplot('GarageCars', 'SalePrice', data=train)
    
    Out[83]:
    
In [84]:
    
sns.lmplot('GarageArea', 'SalePrice', data=train, fit_reg=True)
    
    Out[84]:
    
In [86]:
    
sns.barplot('GarageQual', 'SalePrice', data=train)
    
    Out[86]:
    
In [85]:
    
sns.barplot('GarageCond', 'SalePrice', data=train)
    
    Out[85]:
    
In [87]:
    
sns.barplot('PavedDrive', 'SalePrice', data=train)
    
    Out[87]:
    
In [88]:
    
sns.lmplot('WoodDeckSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[88]:
    
In [89]:
    
sns.lmplot('OpenPorchSF', 'SalePrice', data=train, fit_reg=True)
    
    Out[89]:
    
In [90]:
    
sns.lmplot('EnclosedPorch', 'SalePrice', data=train, fit_reg=True)
    
    Out[90]:
    
In [91]:
    
sns.lmplot('3SsnPorch', 'SalePrice', data=train, fit_reg=True)
    
    Out[91]:
    
In [92]:
    
sns.lmplot('ScreenPorch', 'SalePrice', data=train, fit_reg=True)
    
    Out[92]:
    
In [93]:
    
sns.lmplot('PoolArea', 'SalePrice', data=train, fit_reg=True)
    
    Out[93]:
    
In [95]:
    
sns.barplot('PoolQC', 'SalePrice', data=train)
    
    Out[95]:
    
In [96]:
    
sns.barplot('Fence', 'SalePrice', data=train)
    
    Out[96]:
    
In [97]:
    
sns.barplot('MiscFeature', 'SalePrice', data=train)
    
    Out[97]:
    
In [99]:
    
sns.lmplot('MiscVal', 'SalePrice', data=train, fit_reg=True)
    
    Out[99]:
    
In [100]:
    
sns.barplot('MoSold', 'SalePrice', data=train)
    
    Out[100]:
    
In [101]:
    
sns.barplot('YrSold', 'SalePrice', data=train)
    
    Out[101]:
    
In [102]:
    
sns.barplot('SaleType', 'SalePrice', data=train)
    
    Out[102]:
    
In [103]:
    
sns.barplot('SaleCondition', 'SalePrice', data=train)
    
    Out[103]: